{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "excess-grass",
   "metadata": {
    "papermill": {
     "duration": 0.022097,
     "end_time": "2021-04-06T15:33:53.837895",
     "exception": false,
     "start_time": "2021-04-06T15:33:53.815798",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Pulls data from the Copernicus Data Provider (European Commision) available at cds.climate.copernicus.eu\n",
    "The output is a csv file containing the requested time series.\n",
    "\n",
    "As currently downloading the complete (daily) data set is not supported due to size limitations - only sampled data for every 10th day is available\n",
    "\n",
    "To use the data set legally it is required to create an account here: https://cds.climate.copernicus.eu/\n",
    "Please then obtain your API key from your user provide and provide it as a parameter to this component\n",
    "\n",
    "WARNING: This component currently only supports local execution (not Kubeflow/Airflow)\n",
    "\n",
    "Future work  \n",
    "[ ] Download complete data set by creating multile requests and then merge the results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "assigned-lottery",
   "metadata": {
    "papermill": {
     "duration": 6.449538,
     "end_time": "2021-04-06T15:34:00.298216",
     "exception": false,
     "start_time": "2021-04-06T15:33:53.848678",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "!pip3 install xarray==0.17.0 netcdf4==1.5.6 cdsapi==0.5.1 wget==3.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "american-split",
   "metadata": {
    "papermill": {
     "duration": 0.023171,
     "end_time": "2021-04-06T15:34:00.345966",
     "exception": false,
     "start_time": "2021-04-06T15:34:00.322795",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# @param api key in form UID:APIKey obtained from\n",
    "# https://cds.climate.copernicus.eu/\n",
    "# @param data_dir temporal data storage for local execution\n",
    "# @param file_name csv file name\n",
    "# @param start_year of data\n",
    "# @param end_year of data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "impaired-sharing",
   "metadata": {
    "papermill": {
     "duration": 0.270152,
     "end_time": "2021-04-06T15:34:00.631600",
     "exception": false,
     "start_time": "2021-04-06T15:34:00.361448",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import wget\n",
    "s1 = 'https://raw.githubusercontent.com/elyra-ai/'\n",
    "s2 = 'component-library/master/claimed_utils.py'\n",
    "wget.download(s1 + s2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "stone-karen",
   "metadata": {
    "papermill": {
     "duration": 0.820596,
     "end_time": "2021-04-06T15:34:01.469339",
     "exception": false,
     "start_time": "2021-04-06T15:34:00.648743",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import cdsapi\n",
    "from claimed_utils import unzip\n",
    "import glob\n",
    "import os\n",
    "import pandas as pd\n",
    "import xarray as xr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "local-gather",
   "metadata": {
    "papermill": {
     "duration": 0.023729,
     "end_time": "2021-04-06T15:34:01.510228",
     "exception": false,
     "start_time": "2021-04-06T15:34:01.486499",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "apikey = os.environ.get('api key')\n",
    "file_name = os.environ.get('file_name', 'data.csv')\n",
    "data_dir = os.environ.get('data_dir', '../../data/')\n",
    "start_year = os.environ.get('start_year', '2017')  # up to 1978\n",
    "end_year = os.environ.get('end_year', '2019')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "widespread-ghana",
   "metadata": {
    "papermill": {
     "duration": 0.021484,
     "end_time": "2021-04-06T15:34:01.546626",
     "exception": false,
     "start_time": "2021-04-06T15:34:01.525142",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "skip = False\n",
    "\n",
    "if os.path.exists(data_dir + file_name):\n",
    "    skip = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dense-dublin",
   "metadata": {
    "papermill": {
     "duration": 0.02158,
     "end_time": "2021-04-06T15:34:01.582940",
     "exception": false,
     "start_time": "2021-04-06T15:34:01.561360",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "if not skip:\n",
    "    with open(os.path.expanduser('~/.cdsapirc'), \"w\") as myfile:\n",
    "        myfile.write(\"url: https://cds.climate.copernicus.eu/api/v2\\n\")\n",
    "        myfile.write(\"key: \" + apikey + \"\\n\")\n",
    "        myfile.write(\"verify: 0\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "collected-flavor",
   "metadata": {},
   "outputs": [],
   "source": [
    "year_range = []\n",
    "for i in range(int(start_year), int(end_year) + 1):\n",
    "    year_range.append(str(i))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "joint-preference",
   "metadata": {},
   "outputs": [],
   "source": [
    "query = {\n",
    "    'variable': 'volumetric_surface_soil_moisture',\n",
    "    'type_of_sensor': 'passive',\n",
    "    'time_aggregation': 'month_average',\n",
    "    'year': [\n",
    "        '2017', '2018', '2019'\n",
    "    ],\n",
    "    'month': [\n",
    "        '01', '02', '03',\n",
    "        '04', '05', '06',\n",
    "        '07', '08', '09',\n",
    "        '10', '11', '12',\n",
    "    ],\n",
    "    'day': '01',\n",
    "    'type_of_record': 'cdr',\n",
    "    'version': 'v201912.0.0',\n",
    "    'format': 'zip',\n",
    "}\n",
    "query['year'] = year_range"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "australian-balance",
   "metadata": {
    "papermill": {
     "duration": 0.02449,
     "end_time": "2021-04-06T15:34:01.622599",
     "exception": false,
     "start_time": "2021-04-06T15:34:01.598109",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "if not skip:\n",
    "    c = cdsapi.Client()\n",
    "\n",
    "    c.retrieve(\n",
    "        'satellite-soil-moisture',\n",
    "        query,\n",
    "        data_dir + 'download.zip')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "liberal-california",
   "metadata": {
    "papermill": {
     "duration": 0.021448,
     "end_time": "2021-04-06T15:34:01.658999",
     "exception": false,
     "start_time": "2021-04-06T15:34:01.637551",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "if not skip:\n",
    "    for f in glob.glob(data_dir + '*.nc'):\n",
    "        os.remove(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "liable-preliminary",
   "metadata": {
    "papermill": {
     "duration": 0.021098,
     "end_time": "2021-04-06T15:34:01.695138",
     "exception": false,
     "start_time": "2021-04-06T15:34:01.674040",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "if not skip:\n",
    "    unzip(data_dir, data_dir + 'download.zip')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "drawn-defense",
   "metadata": {
    "papermill": {
     "duration": 0.026043,
     "end_time": "2021-04-06T15:34:01.736214",
     "exception": false,
     "start_time": "2021-04-06T15:34:01.710171",
     "status": "completed"
    },
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "debug = False\n",
    "\n",
    "if not skip:\n",
    "    for filename in os.listdir(data_dir):\n",
    "        if filename.endswith(\".nc\"):\n",
    "            if debug:\n",
    "                print('Starting to process {}...'.format(filename))\n",
    "            else:\n",
    "                print(\".\", end=\"\")\n",
    "            filename_csv = filename.split('.nc')[0] + '.csv'\n",
    "            if not os.path.exists(data_dir + filename_csv):\n",
    "                dset = xr.open_dataset(os.path.join(data_dir, filename))\n",
    "                df = pd.DataFrame(dset['sm'].to_series())\n",
    "                df.reset_index(inplace=True)\n",
    "                df.to_csv(data_dir + filename_csv, index=False)\n",
    "            else:\n",
    "                if debug:\n",
    "                    s1 = 'CSV file {} already '\n",
    "                    s2 = 'present, skipping...'\n",
    "                    print(s1 + s2.format(filename_csv))\n",
    "                else:\n",
    "                    print(\"c\", end=\"\")\n",
    "            os.remove(data_dir + filename)\n",
    "        else:\n",
    "            continue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fresh-niger",
   "metadata": {
    "papermill": {
     "duration": 8.281548,
     "end_time": "2021-04-06T15:34:10.032850",
     "exception": false,
     "start_time": "2021-04-06T15:34:01.751302",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "!echo \"time,lat,lon,sm\" > ../../data/data.csv\n",
    "!for file in `ls ../../data/C3S-SOILMOISTURE-*.csv`; do cat $file |tail -n +2 >> ../../data/data.csv; done"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 17.49536,
   "end_time": "2021-04-06T15:34:10.489088",
   "environment_variables": {},
   "exception": null,
   "input_path": "/home/jovyan/work/examples/pipelines/pairs/component-library/input/input-climate-copernicus.ipynb",
   "output_path": "/home/jovyan/work/examples/pipelines/pairs/component-library/input/input-climate-copernicus.ipynb",
   "parameters": {},
   "start_time": "2021-04-06T15:33:52.993728",
   "version": "2.3.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
